Exploration of Structural Balance Theory

In [1]:
# preambles
import networkx as nx
import cPickle as pickle
import os

# load pickled network file
pickle_name = 'Data_world8_network.pckl'
pickle_dir = 'C:\\Users\\FG\\Desktop\\PhD\\Research\\reddit\\Pickled Data'
reddit_network = pickle.load( open(pickle_dir + os.sep + pickle_name, "rb") )
print "loaded reddit network from ", pickle_dir + os.sep + pickle_name


loaded reddit network from  C:\Users\FG\Desktop\PhD\Research\reddit\Pickled Data\Data_world8_network.pckl

In [2]:
import matplotlib.pyplot as plt
%matplotlib inline

In [3]:
#create graph
reddit_graph = nx.Graph(name='Reddit Graph')

# add edges (nodes added automatically)
for userA in reddit_network.keys():
    for userB in reddit_network[userA].keys():
        # add only if enough messages between the two users
        if len(reddit_network[userA][userB])>=2 and (userA != userB):
            reddit_graph.add_edge(userA, userB)
#             print reddit_graph[userA][userB]['polarity']

# save largest connected subgraph
reddit_graph = sorted(nx.connected_component_subgraphs(reddit_graph), key = len, reverse=True)[0]

In [4]:
# Stats
# print 'degrees:',nx.degree_histogram(reddit_graph)
print 'info: ', nx.info(reddit_graph)
print 'density: ', nx.density(reddit_graph), ' (0-1 scale, 0 for empty graph, 1 for complete graph)'
# triangles
triangles_dict = nx.triangles(reddit_graph)
nodes_in_triangles = [node for node in reddit_graph.nodes() if triangles_dict[node]>=1]
print '# of triangles in graph:', sum(nx.triangles(reddit_graph).values())/3

# plot the degre dist
print "Degree Distribution"
hist=nx.degree_histogram(reddit_graph)
plt.bar(range(len(hist)), hist, align='center')
plt.xlim(0,30)
plt.show()


info:  Name: Reddit Graph
Type: Graph
Number of nodes: 6804
Number of edges: 9279
Average degree:   2.7275
density:  0.000400928006396  (0-1 scale, 0 for empty graph, 1 for complete graph)
# of triangles in graph: 151
Degree Distribution

In [5]:
# extract list of triangles
triangle_list=[] 
done=set()  
for n in reddit_graph: 
    done.add(n)    # 
    nbrdone=set()    # 
    nbrs=set(reddit_graph[n]) 
    for nbr in nbrs: 
        if nbr in done:    # 
            continue    # 
        nbrdone.add(nbr)    # 
        for both in nbrs.intersection(reddit_graph[nbr]): 
            if both in done or both in nbrdone:    # 
                continue    # 
            triangle_list.append( (n,nbr,both) )

print len(triangle_list)


151

In [6]:
#reconstruct graph from triangles (with only edges in those triangles)

from itertools import combinations

triangles_reddit_graph = nx.Graph(name='Triangles Reddit Graph')

for triangle in triangle_list:
    for n1, n2 in combinations(triangle,2):
        # add edge 
         triangles_reddit_graph.add_edge(n1, n2)

In [7]:
# Stats
# print 'degrees:',nx.degree_histogram(reddit_graph)
print 'info: ', nx.info(triangles_reddit_graph)
print 'density: ', nx.density(triangles_reddit_graph), ' (0-1 scale, 0 for empty graph, 1 for complete graph)'
# triangles
triangles_dict = nx.triangles(triangles_reddit_graph)
nodes_in_triangles = [node for node in triangles_reddit_graph.nodes() if triangles_dict[node]>=1]
print '# of triangles in graph:', sum(nx.triangles(triangles_reddit_graph).values())/3

# plot the degre dist
print "Degree Distribution"
hist=nx.degree_histogram(triangles_reddit_graph)
plt.bar(range(len(hist)), hist, align='center')
plt.xlim(0,30)
plt.show()


info:  Name: Triangles Reddit Graph
Type: Graph
Number of nodes: 243
Number of edges: 377
Average degree:   3.1029
density:  0.0128218209026  (0-1 scale, 0 for empty graph, 1 for complete graph)
# of triangles in graph: 151
Degree Distribution

In [8]:
# distribution of types of triangles in a random network
# right half of table 4 in paper

In [9]:
import random

# # randomize edges
for edge in triangles_reddit_graph.edges():
#     #-1 for neg, +1 for pos
    triangles_reddit_graph.edge[edge[0]][edge[1]]['polarity'] = 2*random.randint(0,1)-1

In [10]:
# analyse triangle types
from itertools import combinations

# counts of +++, ++-, +--, --- triangles respectively
counts = {'+++':0, '++-':0, '+--':0, '---':0}

for triangle in triangle_list:
    pairs  = combinations(triangle, 2)
    tot = 0
    for pair in pairs:
        tot += triangles_reddit_graph.edge[pair[0]][pair[1]]['polarity']
    
    if tot == 3:
        counts['+++'] += 1
    elif tot == 1:
        counts['++-'] += 1
    elif tot == -1:
         counts['+--'] += 1
    elif tot == -3:
        counts['---'] += 1
    else:
        print "ERROR"

print "counts in a randomized network: ", counts


counts in a randomized network:  {'++-': 56, '---': 17, '+--': 60, '+++': 18}

In [11]:
# apply opinion finder to each edge

import string

for edge in triangles_reddit_graph.edges():
    # extract text data
    userA, userB = edge[0],edge[1]
    # print userA, userB
    text_list = reddit_network[userA][userB]
    #print text_list
    
    # save text to temp file
    with open("tmp.txt", "w") as text_file:
        for msg in text_list: 
            # remove non-ascii for opinionfinder
            msg_filtered = filter(lambda x: x in string.printable, msg)
#             print msg_filtered
            text_file.write(msg_filtered + '\n')
            
    # analyse polarity
    os.system('java -classpath opinionfinderv2.0\lib\weka.jar;opinionfinderv2.0\lib\stanford-postagger.jar;opinionfinderv2.0\opinionfinder.jar opin.main.RunOpinionFinder tmp.txt -m opinionfinderv2.0\models -l  opinionfinderv2.0\lexicons')
    
    # open polarity results
    with open("tmp.txt_auto_anns\markup.txt", "r") as res_file:
        content = res_file.read()
        positives = content.count("positive")
        negatives = content.count("negative")
        neutrals = content.count("neutral")
        print userA, userB, positives, negatives, neutrals
        
        # assign polarity
        if negatives>positives:
            polarity_val = -1
        elif negatives<positives:
            polarity_val = 1
        else:
            polarity_val = 0
        
        triangles_reddit_graph.edge[userA][userB]['polarity'] = polarity_val

print "DONE"


speedisavirus Reditski 0 1 10
speedisavirus ISeeSharp 0 1 7
Tripwire3 vecnyj 0 1 6
Tripwire3 grammaryan 1 1 13
AdClemson TheLightningbolt 1 5 11
AdClemson jkess04 2 1 30
most_original_nick PreciseCobra 2 13 23
most_original_nick puffpuff9 3 2 8
MrGraeme holocauster-ride 0 0 2
MrGraeme Darius1618 3 10 22
lktgrss tehbored 1 0 5
lktgrss mrnovember5 1 3 22
artureposir Maso_del_Saggio 1 0 10
artureposir TangoJager 1 0 4
artureposir improb 0 2 2
critfist holocauster-ride 1 2 8
critfist Hamartolus 2 0 5
my__name__is 50ShadesOfPatriotic 1 0 9
my__name__is damndirtylies 2 1 5
orru toomanynoobs 0 1 4
orru particle409 3 7 27
NinjaDiscoJesus bitofnewsbot 4 19 91
NinjaDiscoJesus Wagamaga 5 16 4
sternee GhostOfWhatsIAName 0 5 6
sternee MaltyBeverage 3 12 23
iMark70 1x10_-24 0 0 1
iMark70 Wagamaga 0 2 7
midasz JohnMiltonJamesJoyce 0 1 4
midasz retardcharizard 1 0 3
bigdeal42 damndirtylies 0 1 4
bigdeal42 trolls_brigade 0 4 21
puffpuff9 PreciseCobra 13 10 6
InternetPropagandist itsfineitsgreat 3 4 22
InternetPropagandist TooLoudToSilence 0 1 4
RaikerCat bigplrbear 0 0 6
RaikerCat Mad_Jukes 0 1 6
Conflict_Observer 1x10_-24 2 1 11
Conflict_Observer putupyourdukes 0 0 1
Dividedstein Israil_Akbar 0 1 2
Dividedstein ZionistShark 1 4 4
jonzobot Brett686 3 5 13
jonzobot ultraspontane 1 3 9
_Perfectionist DrBoomkin 1 0 3
_Perfectionist i_love_hezbollah 0 17 52
mehdika Tachels 3 0 6
mehdika heavyyy 3 3 5
mehdika Mordredbas 2 6 17
RabidRaccoon oneofmanyshills 20 40 236
RabidRaccoon an_actual_lawyer 0 0 8
RabidRaccoon FnordFinder 4 0 20
RabidRaccoon Riash 1 0 4
RabidRaccoon DineLointHarpie 10 30 62
RabidRaccoon Slyndrr 0 2 7
RabidRaccoon Sleekery 1 0 7
RabidRaccoon Romek_himself 0 0 4
RabidRaccoon BlackTeaLeaves 2 3 15
RabidRaccoon varietygamer 3 4 20
itsfineitsgreat I_love_Israel 0 0 8
itsfineitsgreat koolaidkirby 0 0 12
itsfineitsgreat kulkke 1 2 8
itsfineitsgreat TooLoudToSilence 2 17 49
project_a_jackie Hamartolus 4 5 13
project_a_jackie kriegson 1 5 6
090078601 ahyuknyuk 14 30 118
090078601 triggerthedigger 2 1 2
Lionstrong vaguerant107 0 3 6
Lionstrong ForFUCKSSAKE_ 9 13 46
readerseven 1x10_-24 3 2 11
readerseven StargateParadox 0 0 8
readerseven OB1_kenobi 0 2 14
moushoo NoHorseInThisRace 0 0 1
moushoo uncannylizard 1 2 6
moushoo vainglory7 7 3 16
moushoo StabbiRabbi 19 15 93
moushoo Batatata 1 2 9
moushoo i_love_hezbollah 0 0 7
soon2beaher orr250mph 1 2 2
soon2beaher DrSalted 0 0 2
JudLew rosinthebow 0 1 1
JudLew uncannylizard 2 6 31
razerxs bitofnewsbot 1 3 34
razerxs Wagamaga 3 6 27
Nautil DrivenDogged 4 5 42
Nautil Hagtzel 8 5 40
sphere2040 xanadu_reloaded 0 2 5
sphere2040 InternetOfficer 1 2 7
kriegson machinedog 3 4 8
kriegson Hamartolus 2 7 16
kriegson Zenarchist 0 2 5
1x10_-24 Flower_Ninja 0 1 3
1x10_-24 poonhounds 0 0 2
1x10_-24 JamesColesPardon 0 4 12
1x10_-24 zahrul3 1 0 4
1x10_-24 PixelBlaster 0 0 2
1x10_-24 lordderplythethird 0 0 3
1x10_-24 TooLoudToSilence 0 4 3
1x10_-24 putupyourdukes 0 0 2
1x10_-24 quiteintriguing 1 3 14
1x10_-24 madazzahatter 1 5 21
1x10_-24 HunterSThompson_says 0 1 2
1x10_-24 Wagamaga 0 0 8
1x10_-24 StargateParadox 1 2 4
1x10_-24 holocauster-ride 0 4 6
1x10_-24 herticalt 3 11 23
1x10_-24 OB1_kenobi 0 0 9
zolzks yes_thats_right 1 19 30
zolzks Wagamaga 0 4 9
Flower_Ninja OB1_kenobi 2 2 11
VulvaVulva holocauster-ride 3 14 17
VulvaVulva eazye187 0 0 3
VulvaVulva an_actual_lawyer 0 1 10
VulvaVulva Oreo_Speedwagon 2 10 9
Carter1116 holocauster-ride 0 2 6
Carter1116 AlmostTheNewestDad 0 0 3
Darius1618 holocauster-ride 1 3 5
Darius1618 xanadu_reloaded 1 0 5
Darius1618 Hamartolus 1 1 9
Darius1618 Sir_Beelzebub 3 0 2
toomanynoobs StabbiRabbi 2 7 4
toomanynoobs particle409 2 8 14
toomanynoobs Garet-Jax 0 1 2
i-no-u-no-i-no cock_pussy_up 1 0 4
i-no-u-no-i-no Wagamaga 6 3 22
crazehoarse busdriverbuddha 0 14 68
crazehoarse hellzorak 0 0 4
Tachels uncannylizard 1 3 9
Tachels Mordredbas 2 3 5
Tachels NoHorseInThisRace 1 5 22
koolaidkirby I_love_Israel 1 2 11
yes_thats_right Wagamaga 3 0 16
eazye187 holocauster-ride 4 2 19
eazye187 putupyourdukes 1 0 9
eazye187 guanaco55 0 5 9
Kahing StabbiRabbi 14 31 38
Kahing i_love_hezbollah 1 0 8
machinedog Zenarchist 4 9 42
tehbored mrnovember5 1 12 4
Batatata vainglory7 0 0 5
vecnyj grammaryan 1 2 13
TheLastOfYou holocauster-ride 1 13 20
TheLastOfYou Boris45 2 0 6
TheLastOfYou Wisdom_from_the_Ages 4 19 52
TheLastOfYou guanaco55 1 4 7
Mosetsfire84 hpsyk 3 11 63
Mosetsfire84 EatingSandwiches1 0 0 7
JohnMiltonJamesJoyce MonsieurAnon 0 0 3
JohnMiltonJamesJoyce KnotPtelling 0 2 5
JohnMiltonJamesJoyce retardcharizard 4 1 18
ComplexityAhhoy Tacoman404 0 1 14
ComplexityAhhoy Romek_himself 0 4 4
ComplexityAhhoy MongolPerson 2 26 78
WiseChoices 50ShadesOfPatriotic 3 3 7
WiseChoices DoremusJessup 0 0 3
Mordredbas heavyyy 5 16 55
ZionistShark 50ShadesOfPatriotic 0 2 1
ZionistShark Israil_Akbar 1 8 9
ZionistShark i_love_hezbollah 0 1 8
bitofnewsbot DrSalted 2 8 77
bitofnewsbot Wagamaga 1 8 75
oneofmanyshills ThatGetItKid 3 3 20
oneofmanyshills Slyndrr 7 24 47
oneofmanyshills gonnaupvote3 3 11 87
50ShadesOfPatriotic kwonza 1 2 10
50ShadesOfPatriotic jaaaack 0 1 13
50ShadesOfPatriotic BannedByZionists 0 4 23
50ShadesOfPatriotic trolls_brigade 0 0 1
50ShadesOfPatriotic plato1123 2 2 3
50ShadesOfPatriotic StabbiRabbi 2 1 22
50ShadesOfPatriotic Computer_Name 0 0 4
50ShadesOfPatriotic damndirtylies 5 3 20
50ShadesOfPatriotic vigorous 2 2 11
50ShadesOfPatriotic DoremusJessup 1 2 4
50ShadesOfPatriotic Hamartolus 2 8 26
50ShadesOfPatriotic i_love_hezbollah 1 3 3
50ShadesOfPatriotic Cantmemba 2 4 19
sansaset putupyourdukes 3 3 9
sansaset jaywalker32 1 1 3
DrivenDogged steelnuts 12 25 64
DrivenDogged Hagtzel 4 11 39
DrivenDogged endprism 0 0 1
hippylarvae damndirtylies 1 7 9
hippylarvae putupyourdukes 0 13 6
hippylarvae WillRedditForBitcoin 1 7 8
ultraspontane Brett686 9 25 58
holocauster-ride chuckaway9 0 3 10
holocauster-ride Boris45 5 9 56
holocauster-ride kslusherplantman 0 0 4
holocauster-ride Tatalebuj 1 0 9
holocauster-ride PixelBlaster 0 2 6
holocauster-ride AlmostTheNewestDad 0 3 1
holocauster-ride putupyourdukes 0 1 2
holocauster-ride Wisdom_from_the_Ages 2 0 5
holocauster-ride TooLoudToSilence 0 0 1
holocauster-ride Smoke_Meth_in_Butt 0 0 10
holocauster-ride herticalt 1 5 7
holocauster-ride Hamartolus 1 2 4
holocauster-ride guanaco55 3 0 4
lukeyflukey Smoke_Meth_in_Butt 2 2 5
lukeyflukey Hamartolus 0 0 3
GhostOfWhatsIAName MaltyBeverage 1 6 19
MonsieurAnon KnotPtelling 0 2 1
MonsieurAnon arqoi 0 1 3
tigersharkwushen_ dafez7 0 0 6
tigersharkwushen_ MaltyBeverage 0 0 6
NSA_ActiveMonitor gethighandthink 0 0 1
NSA_ActiveMonitor Ameri-KKK-aSucksMan 5 5 28
CIKAFIUMPH HoliHandGrenades 1 1 4
CIKAFIUMPH ZachofFables 0 0 2
AngryVegetable umakemefunny 2 3 10
AngryVegetable ripcitybitch 0 2 3
kefeer jheohdgs 0 1 8
kefeer varietygamer 0 0 4
kefeer putupyourdukes 0 0 2
kslusherplantman herticalt 0 1 5
ISeeSharp Reditski 1 1 16
gonnaupvote3 ThatGetItKid 6 5 13
damndirtylies trolls_brigade 0 2 6
damndirtylies putupyourdukes 0 2 7
kwonza trolls_brigade 0 0 7
jdscarface herticalt 0 8 23
jdscarface varietygamer 1 0 7
jdscarface putupyourdukes 0 1 3
DrBoomkin DoctorExplosion 0 2 14
DrBoomkin StevefromRetail 3 4 40
DrBoomkin hpsyk 0 1 10
DrBoomkin failbotron 6 10 53
DrBoomkin RufusTheFirefly 0 0 14
DrBoomkin i_love_hezbollah 1 1 5
DrBoomkin OB1_kenobi 0 0 4
WillRedditForBitcoin putupyourdukes 0 0 7
jaywalker32 putupyourdukes 0 0 5
Loki-L Drak_is_Right 2 2 5
Loki-L klug3 0 1 3
HunterSThompson_says quiteintriguing 8 12 31
HunterSThompson_says TooLoudToSilence 0 0 4
RufusTheFirefly zahrul3 0 0 11
RufusTheFirefly hpsyk 0 0 5
RufusTheFirefly OB1_kenobi 0 5 16
CVSer Shoudlaz 3 38 61
CVSer Computer_Name 8 13 33
DarthQuerious boomanwho 3 15 35
DarthQuerious Tatalebuj 1 0 9
Cantmemba jaaaack 0 1 5
RoswellSpaceman umakemefunny 1 1 8
RoswellSpaceman EatingSandwiches1 3 4 8
gethighandthink Ameri-KKK-aSucksMan 2 1 14
poonhounds TooLoudToSilence 2 5 10
SilentTyrant1 randomfact8472 1 4 6
SilentTyrant1 TheCeilingisGreen 2 1 4
DineLointHarpie Slyndrr 9 29 65
TangoJager Maso_del_Saggio 0 0 3
Recoi1 Romek_himself 0 2 4
Recoi1 MongolPerson 0 5 7
Shoudlaz Computer_Name 0 1 0
madazzahatter StargateParadox 0 0 3
madazzahatter OB1_kenobi 0 0 5
HitlersFleshlight Reditski 0 0 8
HitlersFleshlight Hamartolus 0 2 8
umakemefunny ripcitybitch 4 6 24
umakemefunny EatingSandwiches1 1 4 6
existentialadvisor MongolPerson 0 1 5
existentialadvisor Perniciouss 7 7 58
georgevader Ithikari 0 6 15
georgevader vintruvian 2 5 13
xHaGGeNx Garet-Jax 1 2 22
xHaGGeNx LurkerEurope 2 5 12
MongolPerson putupyourdukes 1 0 2
MongolPerson Perniciouss 1 15 27
MongolPerson Tacoman404 0 1 3
MongolPerson Romek_himself 2 3 7
Smoke_Meth_in_Butt Hamartolus 2 5 12
i_love_hezbollah richjew 3 6 13
i_love_hezbollah TheLastSovietSniper 0 0 4
i_love_hezbollah BannedByZionists 2 11 6
i_love_hezbollah StabbiRabbi 24 50 25
NewtonianCooking Sleekery 9 17 25
NewtonianCooking rindindin 1 1 18
NewtonianCooking DoremusJessup 0 0 4
NewtonianCooking cityofkern 4 0 9
bigplrbear Mad_Jukes 0 3 12
Reditski Hamartolus 6 2 23
richjew TheLastSovietSniper 0 0 1
rindindin DoremusJessup 0 1 11
TheCeilingisGreen randomfact8472 0 1 6
Garet-Jax aminice 1 1 2
Garet-Jax StabbiRabbi 4 10 15
Garet-Jax moskonia 0 0 2
Garet-Jax LurkerEurope 2 0 16
trolls_brigade vigorous 0 1 5
plato1123 Computer_Name 1 0 1
an_actual_lawyer BlackTeaLeaves 9 7 24
an_actual_lawyer Romek_himself 2 1 9
an_actual_lawyer Oreo_Speedwagon 7 14 68
Zodiac13 Computer_Name 0 2 2
Zodiac13 QUAD_PENETRATION 0 0 1
vintruvian Ithikari 1 1 8
arqoi KnotPtelling 1 3 5
admronoc jackrousseau 2 4 15
admronoc dragon_nipples 4 3 19
hellzorak busdriverbuddha 7 3 5
jwax33 varietygamer 0 0 10
jwax33 OB1_kenobi 0 1 15
NoHorseInThisRace uncannylizard 0 0 2
TheLightningbolt rosinthebow 5 3 11
TheLightningbolt akingpa 2 10 38
TheLightningbolt jkess04 1 6 30
TastyLipid thatsmrdickface 0 3 24
TastyLipid Fuadius 0 0 4
prutopls rubyroacher 4 6 57
prutopls AndrewFGleich 1 0 10
orr250mph DrSalted 0 0 1
putupyourdukes fredy5 9 1 7
putupyourdukes varietygamer 1 1 9
putupyourdukes Romek_himself 0 1 5
moskonia aminice 5 6 26
BlackTeaLeaves Sleekery 1 1 1
zahrul3 OB1_kenobi 0 0 2
guanaco55 chuckaway9 0 1 4
xanadu_reloaded Sir_Beelzebub 1 2 5
xanadu_reloaded InternetOfficer 0 3 4
vigorous Hamartolus 1 2 34
hpsyk DoctorExplosion 2 1 8
hpsyk StevefromRetail 4 11 26
hpsyk EatingSandwiches1 3 1 8
jkess04 akingpa 0 1 3
Blindbandit809 mrstickball 0 0 6
Blindbandit809 77bc 3 8 21
uncannylizard rosinthebow 0 1 0
cityofkern Sleekery 1 4 2
dafez7 MaltyBeverage 1 3 15
Maso_del_Saggio improb 0 5 38
rosinthebow akingpa 0 3 6
cock_pussy_up Wagamaga 0 1 10
failbotron StevefromRetail 2 2 34
Computer_Name QUAD_PENETRATION 0 0 1
Hagtzel fandrei 1 0 11
Hagtzel BrightTomorrow 2 11 47
fredy5 varietygamer 3 2 34
Tatalebuj boomanwho 12 26 99
Tatalebuj TooLoudToSilence 9 21 30
AlmostTheNewestDad herticalt 3 6 12
HoliHandGrenades ZachofFables 0 0 0
TranslationRussian bell_peppers_n_beef 0 0 4
TranslationRussian infiniZii 3 5 19
mrstickball 77bc 1 6 21
herticalt Rosalee 4 25 36
herticalt JamesColesPardon 15 28 131
herticalt DrSalted 3 2 5
herticalt Wagamaga 7 13 73
herticalt varietygamer 0 2 8
Slyndrr FnordFinder 0 8 13
klug3 Drak_is_Right 0 1 2
rubyroacher AndrewFGleich 5 11 31
CitationX_N7V11C DrSalted 0 0 2
CitationX_N7V11C Wagamaga 1 1 8
dragon_nipples jackrousseau 4 8 68
varietygamer Music_King 5 2 17
varietygamer jheohdgs 0 1 1
varietygamer OB1_kenobi 3 10 23
varietygamer Riash 0 0 7
varietygamer eskimobrother319 0 0 9
vaguerant107 ForFUCKSSAKE_ 3 0 13
endprism steelnuts 3 1 4
OB1_kenobi StevefromRetail 1 2 11
OB1_kenobi Music_King 0 0 2
OB1_kenobi eskimobrother319 3 6 17
JamesColesPardon Rosalee 1 0 6
BrightTomorrow fandrei 0 0 9
lordderplythethird Wagamaga 6 14 75
TooLoudToSilence kulkke 0 3 2
Allthewaylive215 panzerlieder 0 1 12
Allthewaylive215 chabanais 0 0 1
Fuadius thatsmrdickface 0 4 17
ahyuknyuk triggerthedigger 0 3 7
chabanais panzerlieder 1 0 0
DrSalted Wagamaga 1 4 4
bell_peppers_n_beef infiniZii 0 1 8
DONE

In [12]:
# analyse triangle types
# counts of +++, ++-, +--, --- triangles respectively
counts = {'+++':0, '++-':0, '+--':0, '---':0}

for triangle in triangle_list:
    pairs  = combinations(triangle, 2)
    tot = 0
    for pair in pairs:
        tot += triangles_reddit_graph.edge[pair[0]][pair[1]]['polarity']
    
    if tot == 3:
        counts['+++'] += 1
    elif tot == 1:
        counts['++-'] += 1
    elif tot == -1:
         counts['+--'] += 1
    elif tot == -3:
        counts['---'] += 1
    else:
       pass

print "counts in a labeled network: ", counts


counts in a labeled network:  {'++-': 11, '---': 34, '+--': 48, '+++': 1}

In [38]:
# Subgroup detection aproximation algorithm

# param from paper
alpha = 0.5
max_cluster_num = 5

#network to work on
work_graph = triangles_reddit_graph.copy()
best_cluster_graph = work_graph.copy()

best_price = float("inf")

# for each cluster size >=2
for t in range(max_cluster_num+1)[2:]:
    
    # partition network into t clusters randomly
    # (assign random value to node)
    for node in work_graph.nodes():
        work_graph.node[node]['cluster'] = random.randint(1,t)
         
    # compute price of partition
    cur_price = 0
    for edge in work_graph.edges():
        userA, userB, pol = edge[0], edge[1], work_graph.edge[edge[0]][edge[1]]['polarity']
        
        if pol == -1 and  (work_graph.node[userA]['cluster'] == work_graph.node[userB]['cluster']):
            # same group, disagreement
            cur_price += (1-alpha)
        elif pol == 1 and  (work_graph.node[userA]['cluster'] != work_graph.node[userB]['cluster']):
            # dif groups, agreement
            cur_price += alpha
        else:
            # do not increase price
            pass
    print "clusters: ", t, "initial price", cur_price
    
    if cur_price < best_price:
        best_cluster_graph = work_graph.copy()
        best_price = cur_price
        
    # look at all neighbor clusters
    copy_graph = work_graph.copy()
    
    # while neigbors are better
    neighbor_better = True
    while neighbor_better == True:
        
        # set loop break
        neighbor_better = False

        # -1 try each node as switch
        for switch_node in copy_graph.nodes():
            # save cluster value
            save_val = copy_graph.node[switch_node]['cluster']

            # change cluster value 
            for cluster_val in [x for x in range(max_cluster_num+1)[2:] if x != save_val]:
                # try new value
                copy_graph.node[node]['cluster'] = cluster_val

                #compute new price
                neigh_price = 0
                for edge in copy_graph.edges():
                    userA, userB, pol = edge[0], edge[1], copy_graph.edge[edge[0]][edge[1]]['polarity']

                    if pol == -1 and (copy_graph.node[userA]['cluster'] == copy_graph.node[userB]['cluster']):
                        # same group, disagreement
                        neigh_price += (1-alpha)
                    elif pol == 1 and (copy_graph.node[userA]['cluster'] != copy_graph.node[userB]['cluster']):
                        # dif groups, agreement
                        neigh_price += alpha
                    else:
                        # do not increase price
                        pass
                                    
                # assign new values if better
                if neigh_price < cur_price:
                    cur_price = neigh_price
                    best_switch = [switch_node, cluster_val]
                    neighbor_better = True

                if cur_price < best_price:
                    best_cluster_graph = copy_graph.copy()
                    best_price = cur_price

                # reset value
                copy_graph.node[switch_node]['cluster'] = save_val


        # -2 try each edge as switch
        for switch_edge in copy_graph.edges():

            # switch values
            copy_graph.node[switch_edge[0]]['cluster'],copy_graph.node[switch_edge[1]]['cluster'] = copy_graph.node[switch_edge[1]]['cluster'],copy_graph.node[switch_edge[0]]['cluster']

            #compute new price
            neigh_price = 0
            for edge in copy_graph.edges():
                userA, userB, pol = edge[0], edge[1], copy_graph.edge[edge[0]][edge[1]]['polarity']

                if pol == -1 and (copy_graph.node[userA]['cluster'] == copy_graph.node[userB]['cluster']):
                    # same group, disagreement
                    neigh_price += (1-alpha)
                elif pol == 1 and (copy_graph.node[userA]['cluster'] != copy_graph.node[userB]['cluster']):
                    # dif groups, agreement
                    neigh_price += alpha
                else:
                    # do not increase price
                    pass 

            # assign new price if better
            if neigh_price < cur_price:
                cur_price = neigh_price 
                best_switch = switch_edge
                neighbor_better = True

            if cur_price < best_price:
                best_cluster_graph = copy_graph.copy()
                best_price = cur_price

            # reset values
            copy_graph.node[switch_edge[0]]['cluster'],copy_graph.node[switch_edge[1]]['cluster'] = copy_graph.node[switch_edge[1]]['cluster'],copy_graph.node[switch_edge[0]]['cluster']

        #apply switch
        if type(best_switch) == tuple:
            # best switch is edge
            copy_graph.node[best_switch[0]]['cluster'],copy_graph.node[best_switch[1]]['cluster'] = copy_graph.node[best_switch[1]]['cluster'],copy_graph.node[best_switch[0]]['cluster']
        elif type(best_switch) == list:
            # best switch is node'
            copy_graph.node[best_switch[0]]['cluster'] = best_switch[1]
        
        print neighbor_better

        print "best neighbor",cur_price


clusters:  2 initial price 79.5
True
best neighbor 75.5
True
best neighbor 72.5
True
best neighbor 70.5
True
best neighbor 68.5
True
best neighbor 66.5
True
best neighbor 65.0
True
best neighbor 63.5
True
best neighbor 62.0
True
best neighbor 61.5
True
best neighbor 60.5
True
best neighbor 59.5
True
best neighbor 58.5
True
best neighbor 57.5
True
best neighbor 56.5
True
best neighbor 55.5
True
best neighbor 54.5
True
best neighbor 53.5
True
best neighbor 52.5
True
best neighbor 51.5
True
best neighbor 50.5
True
best neighbor 49.5
True
best neighbor 48.5
True
best neighbor 48.0
True
best neighbor 47.5
False
best neighbor 47.5
clusters:  3 initial price 56.5
True
best neighbor 53.5
True
best neighbor 51.5
True
best neighbor 50.0
True
best neighbor 48.5
True
best neighbor 47.0
True
best neighbor 45.5
True
best neighbor 44.5
True
best neighbor 43.5
True
best neighbor 42.5
True
best neighbor 41.5
True
best neighbor 40.5
True
best neighbor 39.5
True
best neighbor 38.5
True
best neighbor 37.5
True
best neighbor 36.5
True
best neighbor 35.5
True
best neighbor 34.5
True
best neighbor 33.5
True
best neighbor 33.0
True
best neighbor 32.5
True
best neighbor 32.0
True
best neighbor 31.5
True
best neighbor 31.0
True
best neighbor 30.5
True
best neighbor 30.0
True
best neighbor 29.5
True
best neighbor 29.0
True
best neighbor 28.5
True
best neighbor 28.0
True
best neighbor 27.5
True
best neighbor 27.0
False
best neighbor 27.0
clusters:  4 initial price 51.5
True
best neighbor 48.5
True
best neighbor 46.0
True
best neighbor 44.5
True
best neighbor 43.0
True
best neighbor 41.5
True
best neighbor 40.0
True
best neighbor 39.0
True
best neighbor 38.0
True
best neighbor 37.0
True
best neighbor 36.0
True
best neighbor 35.0
True
best neighbor 34.0
True
best neighbor 33.0
True
best neighbor 32.0
True
best neighbor 31.0
True
best neighbor 30.0
True
best neighbor 29.5
True
best neighbor 29.0
True
best neighbor 28.5
True
best neighbor 28.0
True
best neighbor 27.5
True
best neighbor 27.0
True
best neighbor 26.5
True
best neighbor 26.0
True
best neighbor 25.5
True
best neighbor 25.0
True
best neighbor 24.5
True
best neighbor 24.0
True
best neighbor 23.5
True
best neighbor 23.0
True
best neighbor 22.5
False
best neighbor 22.5
clusters:  5 initial price 45.5
True
best neighbor 42.5
True
best neighbor 40.0
True
best neighbor 38.0
True
best neighbor 36.5
True
best neighbor 35.5
True
best neighbor 34.5
True
best neighbor 33.5
True
best neighbor 32.5
True
best neighbor 31.5
True
best neighbor 30.5
True
best neighbor 30.0
True
best neighbor 29.5
True
best neighbor 29.0
True
best neighbor 28.5
True
best neighbor 28.0
True
best neighbor 27.5
True
best neighbor 27.0
True
best neighbor 26.5
True
best neighbor 26.0
True
best neighbor 25.5
True
best neighbor 25.0
True
best neighbor 24.5
True
best neighbor 24.0
True
best neighbor 23.5
True
best neighbor 23.0
True
best neighbor 22.5
True
best neighbor 22.0
True
best neighbor 21.5
True
best neighbor 21.0
False
best neighbor 21.0

In [44]:
print best_price
for node in best_cluster_graph.nodes():
    print node, best_cluster_graph.node[node]['cluster']


 21.0
speedisavirus 2
Tripwire3 4
AdClemson 1
most_original_nick 4
MrGraeme 3
chuckaway9 1
artureposir 5
critfist 5
my__name__is 4
mrstickball 1
orru 1
NinjaDiscoJesus 3
sternee 2
kriegson 1
midasz 5
bigdeal42 3
puffpuff9 5
InternetPropagandist 5
Carter1116 5
Dividedstein 3
zolzks 3
Garet-Jax 4
mehdika 3
RabidRaccoon 3
itsfineitsgreat 3
umakemefunny 5
project_a_jackie 2
Lionstrong 2
readerseven 1
moushoo 2
triggerthedigger 2
JudLew 2
razerxs 4
sphere2040 5
iMark70 3
Romek_himself 3
jonzobot 2
Flower_Ninja 3
VulvaVulva 2
soon2beaher 2
akingpa 2
toomanynoobs 5
i-no-u-no-i-no 2
crazehoarse 1
koolaidkirby 2
yes_thats_right 2
kulkke 5
eazye187 1
Kahing 4
machinedog 4
Batatata 4
vecnyj 3
georgevader 3
Mosetsfire84 4
JohnMiltonJamesJoyce 1
ComplexityAhhoy 1
WiseChoices 4
Mordredbas 1
ZionistShark 2
bitofnewsbot 1
oneofmanyshills 5
50ShadesOfPatriotic 1
sansaset 2
KnotPtelling 3
dragon_nipples 1
holocauster-ride 3
Israil_Akbar 4
lukeyflukey 1
GhostOfWhatsIAName 4
Boris45 1
MonsieurAnon 2
tigersharkwushen_ 1
NSA_ActiveMonitor 4
jaaaack 3
CIKAFIUMPH 5
AngryVegetable 3
Tatalebuj 3
kefeer 5
kslusherplantman 5
ISeeSharp 3
gonnaupvote3 3
damndirtylies 4
mrnovember5 3
kwonza 3
jdscarface 3
DrBoomkin 1
WillRedditForBitcoin 3
jaywalker32 1
090078601 2
HunterSThompson_says 4
Loki-L 2
TheLastOfYou 1
RufusTheFirefly 3
CVSer 2
DarthQuerious 4
grammaryan 4
RaikerCat 2
Tachels 3
gethighandthink 1
poonhounds 3
TangoJager 5
Recoi1 2
Shoudlaz 2
DineLointHarpie 5
vainglory7 2
HitlersFleshlight 1
ThatGetItKid 3
Drak_is_Right 2
existentialadvisor 1
xHaGGeNx 1
MongolPerson 3
Smoke_Meth_in_Butt 3
Ameri-KKK-aSucksMan 1
i_love_hezbollah 4
DrSalted 3
bigplrbear 4
Reditski 3
richjew 3
rindindin 4
TheCeilingisGreen 3
QUAD_PENETRATION 4
_Perfectionist 5
tehbored 2
plato1123 5
an_actual_lawyer 3
putupyourdukes 1
Wisdom_from_the_Ages 3
Zodiac13 3
vintruvian 4
arqoi 1
DoremusJessup 3
PreciseCobra 1
hellzorak 2
TheLastSovietSniper 1
jwax33 1
NoHorseInThisRace 4
TheLightningbolt 3
TastyLipid 5
prutopls 5
orr250mph 1
Conflict_Observer 5
moskonia 3
Cantmemba 2
SilentTyrant1 3
guanaco55 2
xanadu_reloaded 2
vigorous 3
hpsyk 2
RoswellSpaceman 4
aminice 2
Blindbandit809 4
uncannylizard 1
cityofkern 1
Maso_del_Saggio 3
rosinthebow 3
randomfact8472 1
Oreo_Speedwagon 5
Computer_Name 4
Tacoman404 2
Hagtzel 4
Zenarchist 2
BlackTeaLeaves 1
Ithikari 4
Darius1618 2
dafez7 2
PixelBlaster 5
particle409 4
hippylarvae 5
retardcharizard 1
HoliHandGrenades 5
TranslationRussian 2
admronoc 2
I_love_Israel 5
Perniciouss 1
herticalt 4
LurkerEurope 5
ripcitybitch 4
InternetOfficer 3
klug3 3
rubyroacher 4
CitationX_N7V11C 3
ultraspontane 5
failbotron 5
improb 1
AlmostTheNewestDad 3
StabbiRabbi 2
varietygamer 3
vaguerant107 1
endprism 4
OB1_kenobi 2
JamesColesPardon 5
BrightTomorrow 5
DrivenDogged 3
Mad_Jukes 3
jkess04 1
BannedByZionists 5
lordderplythethird 1
TooLoudToSilence 2
Allthewaylive215 1
Slyndrr 1
AndrewFGleich 2
StargateParadox 2
Riash 2
Nautil 1
1x10_-24 1
fredy5 2
StevefromRetail 5
infiniZii 1
DoctorExplosion 5
Music_King 3
ahyuknyuk 2
EatingSandwiches1 3
chabanais 4
Sir_Beelzebub 1
FnordFinder 5
panzerlieder 4
jheohdgs 1
NewtonianCooking 4
Wagamaga 2
Sleekery 3
77bc 3
heavyyy 4
ZachofFables 3
trolls_brigade 1
Rosalee 2
Brett686 3
jackrousseau 5
zahrul3 1
lktgrss 4
bell_peppers_n_beef 2
fandrei 4
cock_pussy_up 4
quiteintriguing 2
madazzahatter 2
MaltyBeverage 1
Fuadius 3
ForFUCKSSAKE_ 5
steelnuts 5
boomanwho 5
Hamartolus 4
thatsmrdickface 2
busdriverbuddha 4
eskimobrother319 5

In [35]:
for edge in triangles_reddit_graph.edges():
    print type(edge)
    break
    
for node in triangles_reddit_graph.nodes():
    print type(node)
    break

print type(('test', 1)) == tuple


<type 'tuple'>
<type 'str'>
True

In [14]:
# !cd opinionfinderv2.0\
!java -classpath opinionfinderv2.0\lib\weka.jar;opinionfinderv2.0\lib\stanford-postagger.jar;opinionfinderv2.0\opinionfinder.jar opin.main.RunOpinionFinder opinionfinderv2.0\README.txt -m opinionfinderv2.0\models -l  opinionfinderv2.0\lexicons


preprocessor: processing opinionfinderv2.0\README.txt
cluefinder: processing opinionfinderv2.0\README.txt
rulebased: processing opinionfinderv2.0\README.txt
subjclassifier: processing opinionfinderv2.0\README.txt
polarityclassifier: processing opinionfinderv2.0\README.txt
SGMLOutput: processing opinionfinderv2.0\README.txt
Loading default properties from trained tagger C:\Users\FG\Desktop\PhD\Research\reddit\opinionfinderv2.0\models\english-left3words-distsim.tagger
Reading POS tagger model from C:\Users\FG\Desktop\PhD\Research\reddit\opinionfinderv2.0\models\english-left3words-distsim.tagger ... done [1.7 sec].
Picked up _JAVA_OPTIONS: -Xmx512M